Code
library(tidyverse)
library(ggtree)
library(ggtreeExtra)
library(ape)
library(ggnewscale)
library(RColorBrewer)
library(svglite)
source("scripts/metadata_colors.R")Libraries
library(tidyverse)
library(ggtree)
library(ggtreeExtra)
library(ape)
library(ggnewscale)
library(RColorBrewer)
library(svglite)
source("scripts/metadata_colors.R")Paths
metadata_path <-
"data/processed/metadata_ashton_desj_all_weavepop_H99.csv"
ploidy_path <-
"results/tables/ploidy_from_plots.tsv"
duplications_path <-
"results/tables/duplications.tsv"
merged_tree_path <-
"data/processed/tree_merged.newick"
tree_merged_duplications_path <-
"results/trees_dups/tree_merged_duplications.svg"
tree_merged_duplications_only_duplicated <-
"results/trees_dups/tree_merged_duplications_only_duplicated.svg"Load the necessary data
metadata <- read.csv(
metadata_path,
header = TRUE)%>%
select(strain, everything())Get one dataframe for each variable to be plotted as a separate metadata column in the tree
metadata$vni_subdivision <- factor(metadata$vni_subdivision,
levels = c("VNIa-4", "VNIa-5", "VNIa-32",
"VNIa-93", "VNIa-X", "VNIa-Y", "VNIb",
"VNIc", "VNIa-outlier"))
metadata$country_of_origin <- factor(metadata$country_of_origin,
levels = names(country_colors))
sublineage <- metadata %>%
filter(lineage == "VNI")%>%
select(strain, vni_subdivision)%>%
column_to_rownames("strain")%>%
droplevels()
lineage <- metadata %>%
select(strain, lineage)%>%
column_to_rownames("strain")
dataset <- metadata %>%
select(strain, dataset)%>%
column_to_rownames("strain")
source <- metadata %>%
select(strain, source)%>%
column_to_rownames("strain")
country <- metadata %>%
select(strain, country_of_origin)%>%
column_to_rownames("strain") ploidy <- read.delim(
ploidy_path,
header=TRUE,
sep="\t")%>%
select(strain, ploidy)%>%
filter(ploidy != "haploid")%>%
distinct()%>%
column_to_rownames("strain")duplications <- read.delim(
duplications_path,
sep = "\t", header = TRUE, stringsAsFactors = TRUE)duplications_full <- duplications %>%
select(strain, chromosome) %>%
distinct()Make matrix of duplicated chromosomes
dup_chroms <- duplications_full %>%
select(strain, chromosome)%>%
mutate(duplicated_full = 1)%>%
arrange(chromosome)%>%
pivot_wider(names_from = chromosome, values_from = duplicated_full, values_fill = 0)%>%
column_to_rownames("strain")%>%
mutate(across(everything(), ~ ifelse(. == 1, cur_column(),"Euploid")))
euploid_strain <- metadata %>%
filter(!strain %in% duplications_full$strain)%>%
select(strain)
for (chrom in colnames(dup_chroms)){
euploid_strain[chrom] <- "Euploid"
}
dup_chroms <- euploid_strain %>%
column_to_rownames("strain") %>%
bind_rows(dup_chroms)tree <- read.tree(merged_tree_path)Remove tips that are not in metadata$strain
tree <- drop.tip(tree, setdiff(tree$tip.label, metadata$strain))Get the node number of the Most Recent Common Ancestor of each lineage
VNI_node <- getMRCA(tree, c("Tu241-1","UI_31647-2"))
VNII_node <- getMRCA(tree, c("C2","C12"))
VNBI_node <- getMRCA(tree, c("Tu229-1","Ftc267-2"))
VNBII_node <- getMRCA(tree, c("MW-RSA3321","MW-RSA3179"))
nodes_lineages <- data.frame(
lineage = c("VNI", "VNII", "VNBI", "VNBII"),
mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node)
)VNIa4_node <- getMRCA(tree, c("04CN-30-008","UI_31647-2"))
VNIa5_node <- getMRCA(tree, c("BMD852","14936_1#45"))
VNIa93_node <- getMRCA(tree, c("04CN-65-080","04CN-65-002"))
VNIa32_node <- getMRCA(tree, c("BMD942","BMD2801"))
VNIaX_node <- getMRCA(tree, c("Bt48","04CN-63-007"))
VNIaY_node <- getMRCA(tree, c("04CN-65-073","Bt138"))
VNIb_node <- getMRCA(tree, c("04CN-65-096","MW-RSA722"))
VNIc_node <- getMRCA(tree, c("Bt20","Bt11"))
nodes_vnisublineages <- data.frame(
sublineage = c(
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32", "VNIa-X", "VNIa-Y",
"VNIb", "VNIc"),
mrca = c(
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node, VNIaX_node, VNIaY_node,
VNIb_node, VNIc_node))nodes_sublineages <- data.frame(
sublineage = c("VNI", "VNII", "VNBI", "VNBII",
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32", "VNIa-X", "VNIa-Y",
"VNIb", "VNIc"),
mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node,
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node, VNIaX_node, VNIaY_node,
VNIb_node, VNIc_node))chrom_colors <- brewer.pal(7, "Paired")
names(chrom_colors) <- c("chr01", "chr04",
"chr06", "chr09",
"chr12","chr13", "chr14")
chrom_dup_colors <- c(chrom_colors, "Euploid" = "grey93")m <- ggtree(tree,
ladderize = TRUE,
layout = "circular",
branch.length = "none",
size = 0.1) %<+% metadata +
geom_tiplab(color = "black", size = 0.5, offset = 0.01)+
geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]),
size = 2, , fontface = "bold",
hjust = 1.25, vjust = -0.5)+
geom_hilight(data=nodes_vnisublineages,
aes(node=mrca, fill=sublineage), alpha = 0.8)+
scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
guides(fill = FALSE)+
new_scale_fill()+
geom_tree(size = 0.1)+
geom_tippoint(aes(color = country_of_origin),
size = 0.3)+
scale_color_manual(name = "Country", values = country_colors,
limits = levels(country$country_of_origin))+
guides(color = guide_legend(override.aes = list(size = 5), order = 1, ncol = 2))
m1 <- gheatmap(m, source, width=.05, colnames=FALSE, offset=3) +
scale_fill_manual(values = source_colors, name="Source", na.translate = FALSE)+
guides(fill = guide_legend(order = 2))+
new_scale_fill()
m2 <- gheatmap(m1, dup_chroms, width=.32, colnames = FALSE, offset=6) +
scale_fill_manual(values = chrom_dup_colors,
name="Duplicated\nchromosomes",
na.translate = FALSE )+
guides(fill = guide_legend(order = 5))+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
align = TRUE, face = "bold",
angle = "auto", offset = 20)+
theme(legend.position = "bottom",
legend.direction = "vertical")
m2m2 <- gheatmap(m1, dup_chroms, width=.32, colnames = FALSE, offset=5,) +
scale_fill_manual(values = chrom_dup_colors,
name="Duplicated\nchromosomes",
na.translate = FALSE )+
guides(fill = guide_legend(order = 3))+
new_scale_fill()
m3 <- gheatmap(m2, ploidy, width=.05, colnames=FALSE, offset=17) +
scale_fill_brewer(palette = "Set1",
name="Ploidy", na.translate = FALSE)+
guides(fill = guide_legend(order = 4))+
new_scale_fill()+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
align = TRUE, face = "bold",
angle = "auto", offset = 25)+
theme(legend.position = "bottom",
legend.direction = "vertical")
m3ggsave(tree_merged_duplications_path, m3, height = 15, width = 15, units = "in", dpi = 900)Warning: Removed 2107 rows containing missing values or values outside the scale range
(`geom_text()`).
aneuploid <- duplications_full %>%
group_by(strain)%>%
summarise(chromosome = str_c(chromosome, collapse="_")) %>%
column_to_rownames("strain")md <- gheatmap(m1, aneuploid, width=.05, colnames = FALSE, offset=5.5) +
scale_fill_brewer(name = "Duplicated\nchromosomes", palette = "Paired", na.value = "gray90")+
guides(fill = guide_legend(order = 5))+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
align = TRUE, face = "bold",
angle = "auto", offset = 7)
mdm3 <- gheatmap(m1, aneuploid, width=.05, colnames = FALSE, offset=5.5) +
scale_fill_brewer(name = "Duplicated\nchromosomes", palette = "Paired", na.value = "gray90")+
guides(fill = guide_legend(order = 5))+
new_scale_fill()
m4 <- gheatmap(m3, ploidy, width=.05, colnames=FALSE, offset=8) +
scale_fill_brewer(palette = "Set1",
name="Ploidy", na.translate = FALSE)+
guides(fill = guide_legend(order = 4))+
new_scale_fill()+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
align = TRUE, face = "bold",
angle = "auto", offset = 10)+
theme(legend.position = "bottom",
legend.direction = "vertical")
m4ggsave(tree_merged_duplications_path, m2, height = 15, width = 15, units = "in", dpi = 900)Warning: Removed 2107 rows containing missing values or values outside the scale range
(`geom_text()`).
m <- ggtree(tree,
ladderize = TRUE,
layout = "circular",
branch.length = "none",
size = 0.1) %<+% metadata +
geom_tiplab(color = "black", size = 0.5, offset = 0.01)+
geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]),
size = 2, , fontface = "bold",
hjust = 1.25, vjust = -0.5)+
geom_hilight(data=nodes_vnisublineages,
aes(node=mrca, fill=sublineage), alpha = 0.8)+
scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
guides(fill = FALSE)+
new_scale_fill()+
geom_tree(size = 0.1)+
geom_tippoint(aes(color = dataset), size = 0.3)+
scale_color_manual(name = "Dataset", values = dataset_colors)+
guides(color = guide_legend(override.aes = list(size = 5), order = 1))
m1 <- gheatmap(m, country, width=.05, colnames=FALSE, offset=3) +
scale_fill_manual(values = country_colors, name="Country",
na.translate = FALSE, limits = levels(country$country_of_origin))+
guides(fill = guide_legend(order = 2, ncol = 2))+
new_scale_fill()
m2 <- gheatmap(m1, source, width=.05, colnames=FALSE, offset=4) +
scale_fill_manual(values = source_colors, name="Source", na.translate = FALSE)+
guides(fill = guide_legend(order = 4))+
new_scale_fill()
m3 <- gheatmap(m2, dup_chroms, width=.32, colnames = FALSE, offset=7) +
scale_fill_manual(values = chrom_dup_colors,
name="Duplicated\nchromosomes",
na.translate = FALSE )+
guides(fill = guide_legend(order = 5))+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
align = TRUE, face = "bold",
angle = "auto", offset = 21)+
theme(legend.position = "bottom",
legend.direction = "vertical")
m3keep_strains <- c(levels(duplications_full$strain), "H99", "Bt22", "Bt89")
tree_dups <- drop.tip(tree, setdiff(tree$tip.label, keep_strains))
sublineage <- sublineage %>%
filter(rownames(.) %in% keep_strains)%>%
droplevels()Get the node number of the Most Recent Common Ancestor of each lineage
VNI_node <- getMRCA(tree_dups, c("Bt139","H99"))
VNII_node <- getMRCA(tree_dups, c("8-1","C12"))
VNBI_node <- getMRCA(tree_dups, c("Bt22","NRHc5045.ENR.CLIN.ISO"))
VNBII_node <- getMRCA(tree_dups, c("Bt109","Bt89"))
nodes_lineages <- data.frame(
lineage = c("VNI", "VNII", "VNBI", "VNBII"),
mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node)
)VNIa4_node <- getMRCA(tree_dups, c("20427_3#26","20427_4#13"))
VNIa5_node <- getMRCA(tree_dups, c("Bt139","Bt141"))
VNIa93_node <- getMRCA(tree_dups, c("04CN-64-024","04CN-64-011"))
VNIa32_node <- getMRCA(tree_dups, c("04CN-65-072","In2632"))
VNIb_node <- getMRCA(tree_dups, c("H99","MW-RSA6134"))
VNIc_node <- getMRCA(tree_dups, c("LP-RSA3042","PMHc1031A.ENR.INI.LP"))
nodes_vnisublineages <- data.frame(
sublineage = c(
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32",
"VNIb", "VNIc"),
mrca = c(
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node,
VNIb_node, VNIc_node))nodes_sublineages <- data.frame(
sublineage = c("VNI", "VNII", "VNBI",# "VNBII",
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32", "VNIa-X", "VNIa-Y",
"VNIb", "VNIc"),
mrca = c(VNI_node, VNII_node, VNBI_node, #VNBII_node,
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node, VNIaX_node, VNIaY_node,
VNIb_node, VNIc_node))m <- ggtree(tree_dups,
ladderize = TRUE,
layout = "rectangular",
branch.length = "none",
size = 0.1) %<+% metadata +
geom_tiplab(color = "black", size = 4, offset = 0.1)+
geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]),
size = 2, , fontface = "bold",
hjust = 1.25, vjust = -0.5)+
geom_hilight(data=nodes_vnisublineages,
aes(node=mrca, fill=sublineage), alpha = 0.8)+
scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
guides(fill = FALSE)+
new_scale_fill()+
geom_tree(size = 0.1)+
geom_tippoint(aes(color = dataset), size = 3)+
scale_color_manual(name = "Dataset", values = dataset_colors)+
guides(color = guide_legend(override.aes = list(size = 5), order = 1))
m1 <- gheatmap(m, country, width=.03, colnames=FALSE, offset=2) +
scale_fill_manual(values = country_colors, name="Country",
na.translate = FALSE, limits = levels(country$country_of_origin))+
guides(fill = guide_legend(order = 2, ncol = 2))+
new_scale_fill()
m2 <- gheatmap(m1, source, width=.03, colnames=FALSE, offset=2.5) +
scale_fill_manual(values = source_colors, name="Source", na.translate = FALSE)+
guides(fill = guide_legend(order = 4))+
new_scale_fill()
m3 <- gheatmap(m2, dup_chroms, width=.25, colnames = FALSE, offset=3) +
scale_fill_manual(values = chrom_dup_colors,
name="Duplicated\nchromosomes",
na.translate = FALSE )+
guides(fill = guide_legend(order = 5))+
geom_cladelab(data = nodes_vnisublineages,
mapping = aes(node = mrca, label = sublineage),
align = TRUE, face = "bold",
angle = 0, offset = 6)+
theme(legend.position = "bottom",
legend.direction = "vertical")
m3ggsave(tree_merged_duplications_only_duplicated, m3, height = 8, width = 13, units = "in", dpi = 900)Warning: Removed 73 rows containing missing values or values outside the scale range
(`geom_text()`).